UMAP Visualizations

In this section, we are visualizing the encoding spaces of models in a compressed 2D/3D space to delineate the manifold structure

1) Loading Data

import numpy as np
import pandas as pd
import seaborn as sns
import deciphering_enigma
import matplotlib.pyplot as plt

#define the experiment config file path
path_to_config = './config.yaml'

#read the experiment config file
exp_config = deciphering_enigma.load_yaml_config(path_to_config)
dataset_path = exp_config.dataset_path

#register experiment directory and read wav files' paths
audio_files = deciphering_enigma.build_experiment(exp_config)
audio_files = [s for s in audio_files if s.endswith('mic1_normloud.wav')]
print(f'Dataset has {len(audio_files)} samples')
Dataset has 44455 samples
#extract metadata from file name convention
metadata_df, audio_format = deciphering_enigma.extract_metadata(exp_config, audio_files)
metadata_df.drop(columns=['xx', 'Label'], inplace=True)

#load audio files as torch tensors to get ready for feature extraction
audio_tensor_list = deciphering_enigma.load_dataset(audio_files, cfg=exp_config, speaker_ids=metadata_df['ID'], audio_format=audio_format)
Audio Tensors are already saved for vctk_umap_experiment
#compute the duration of each utterance to use it later in the plots
import soundfile as sf
from tqdm import tqdm
dur = []
for file in tqdm(audio_files):
    audio, sr = sf.read(file)
    dur.append(len(audio)/sr)
100%|████████████████████████████████████| 44455/44455 [05:10<00:00, 143.28it/s]

2) Generating Embeddings

#generate speech embeddings
feature_extractor = deciphering_enigma.FeatureExtractor()
embeddings_dict = feature_extractor.extract(audio_tensor_list, exp_config)

3) UMAP Dimensionality Reduction

import os
import numpy as np
import pandas as pd

import scipy
from scipy.spatial.distance import pdist

from umap import UMAP
from pacmap import PaCMAP
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import ParameterGrid

from deciphering_enigma.settings import _hyperparams_grid_reducers, _optimize_function, _knn, _subsetsize

class ReducerTuner():
    """Tuner for dimensionality reduction methods.

    Implements grid-search across hyperparameters for each dimensionality reduction method preset in the settings script.
    NOTE: any method added in the settings script should follow sklearn implementation.
    Tunes reduced dimensions by optimizing local and global structure metrics.
    Saves tuned results for each method as a pandas dataframe.
    """

    def __init__(self):
        self.reducer_params_grid = _hyperparams_grid_reducers
        self.optimize_func = _optimize_function
        self.knn = _knn; self.subsetsize = _subsetsize

    def embedding_quality(self, X, Z, knn=10, subsetsize=1000):
        nbrs1 = NearestNeighbors(n_neighbors=knn).fit(X)
        ind1 = nbrs1.kneighbors(return_distance=False)

        nbrs2 = NearestNeighbors(n_neighbors=knn).fit(Z)
        ind2 = nbrs2.kneighbors(return_distance=False)

        intersections = 0.0
        for i in range(X.shape[0]):
            intersections += len(set(ind1[i]) & set(ind2[i]))
        mnn = intersections / X.shape[0] / knn

        subset = np.random.choice(X.shape[0], size=subsetsize, replace=True)
        d1 = pdist(X[subset,:])
        d2 = pdist(Z[subset,:])
        rho = scipy.stats.spearmanr(d1[:,None],d2[:,None]).correlation
        return (mnn, rho)
    
    def get_reducer(self, name):
        if name == 'PCA':
            return PCA
        elif name == 'tSNE':
            return TSNE
        elif name == 'UMAP':
            return UMAP
        elif name == 'PaCMAP':
            return PaCMAP
        else:
            raise AttributeError(f'This reducer {name} is not included...')

    def fit_eval(self, embeddings, reducer):
        stand_embeddings = StandardScaler().fit_transform(embeddings)
        reduced_embeddings = reducer.fit_transform(stand_embeddings)
        local_val, global_val = self.embedding_quality(stand_embeddings, reduced_embeddings, knn=self.knn, subsetsize=self.subsetsize)
        return reduced_embeddings, local_val, global_val
    
    def save_results_pandas(self, reducers_embeddings_dict, metadata=None, model_name=None, dataset_name=None):
        save_path = f'../{dataset_name}/{model_name}/dim_reduction_3d.csv'
        combined_column_obj = pd.MultiIndex.from_product([reducers_embeddings_dict.keys(),['Local', 'Global'], ['Dim1', 'Dim2', 'Dim3']], names=["Method", "Optimized Metric", "Dim"])
        df = pd.DataFrame(data=[], columns=combined_column_obj)
        for j, name in enumerate(reducers_embeddings_dict.keys()):
            global_embeddings = reducers_embeddings_dict[name]['Global']
            local_embeddings = reducers_embeddings_dict[name]['Local']
            df.loc[:, (name, 'Local', 'Dim1')] = local_embeddings[:,0]
            df.loc[:, (name, 'Local', 'Dim2')] = local_embeddings[:,1]
            df.loc[:, (name, 'Local', 'Dim3')] = local_embeddings[:,2]
            df.loc[:, (name, 'Global', 'Dim1')] = global_embeddings[:,0]
            df.loc[:, (name, 'Global', 'Dim2')] = global_embeddings[:,1]
            df.loc[:, (name, 'Global', 'Dim3')] = global_embeddings[:,2]
        temp_df = metadata.copy()
        temp_df.columns = pd.MultiIndex.from_tuples(map(lambda x: (x, '', ''), temp_df.columns))
        df = pd.concat([df, temp_df], axis=1)
        df.to_csv(save_path)

    def tune_reducer(self, embeddings, metadata=None, dataset_name=None, model_name=None, save_results = True, save_path='./'):
        reducers_embeddings_dict = {}
        metrics_dict = {}
        df_path = f'../{dataset_name}/{model_name}/dim_reduction_3d.csv'
        if os.path.isfile(df_path):
            print(f'Tuned Reduced Embeddings already saved for {model_name} model!')
        else:
            for i, (reducer_name, reducer_params) in enumerate(self.reducer_params_grid.items()):
                print(f'Reducer {i+1}/{len(self.reducer_params_grid.keys())}: {reducer_name}...')
                reducers_embeddings_dict[reducer_name] = {}
                reducer_object = self.get_reducer(reducer_name)
                params_iterator = list(ParameterGrid(reducer_params))
                all_embeddings = []; local_metrics = []; global_metrics = []
                for params in params_iterator:
                    print(params)
                    reducer = reducer_object(n_components=3, random_state=42, **params)
                    reduced_embeddings, local_metric, global_metric = self.fit_eval(embeddings, reducer)
                    all_embeddings.append(reduced_embeddings); local_metrics.append(local_metric); global_metrics.append(global_metric)
                max_local_idx = np.argmax(local_metrics)
                max_global_idx = np.argmax(global_metrics)
                metrics_dict[reducer_name] = {'Local': np.max(local_metrics), 'Global': np.max(global_metrics)}
                reducers_embeddings_dict[reducer_name]['Local'] = all_embeddings[max_local_idx]
                reducers_embeddings_dict[reducer_name]['Global'] = all_embeddings[max_global_idx]
            if save_results:
                self.save_results_pandas(reducers_embeddings_dict, metadata, model_name, dataset_name)
tuner = deciphering_enigma.ReducerTuner()
for i, model_name in enumerate(embeddings_dict.keys()):
    print(f'{model_name}:')
    tuner.tune_reducer(embeddings_dict[model_name], metadata=metadata_df, dataset_name=exp_config.dataset_name, model_name=model_name)
#2D interactive plot function using plotly
import plotly.express as px
import plotly.offline as py
from plotly.subplots import make_subplots
def visualize_embeddings(df, label_name, metrics=[], axis=[], acoustic_param={}, opt_structure='Local', red_name='PCA', row=1, col=1, hovertext='', label=''):
    traces = px.scatter(x=df[red_name, opt_structure, 'Dim1'], y=df[red_name, opt_structure, 'Dim2'], color=df[label_name], hover_name=df['AudioNames'])
    traces.layout.update(showlegend=False)
    axis.add_traces(
        list(traces.select_traces()),
        rows=row, cols=col
    )
optimize = 'Global'
label = 'ID'

1. Log-Mel-Spectrogram

fig = make_subplots(rows=1, cols=1)
model_name = 'Log-Mel-Spectrogram'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=1000,
    height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'BYOL-A_default'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=1000,
    height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'BYOL-I_default'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=1000,
    height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'BYOL-S_default'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=1000,
    height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'BYOL-S_cvt'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=1000,
    height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'Hybrid_BYOL-S_cvt'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=1000,
    height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'APC'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=1000,
    height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'TERA'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=1000,
    height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=2, cols=4)
model_name = 'Wav2Vec2_latent'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=4000,
    height=2000, showlegend=False,)
fig.show()
fig = make_subplots(rows=2, cols=4)
model_name = 'Wav2Vec2'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=4000,
    height=2000, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'HuBERT_latent'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=1000,
    height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'HuBERT_best'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=1000,
    height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'HuBERT'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=1000,
    height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'HuBERT'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
df['Scentence'] = df['AudioNames'].apply(lambda x: x.split('_')[1])
visualize_embeddings(df, 'Scentence', metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=1000,
    height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'HuBERT'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
df['Duration'] = np.log(dur)
df['Duration'] = df['Duration'].astype(float)
visualize_embeddings(df, 'Duration', metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=1000,
    height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=1, cols=1)
model_name = 'Data2Vec_latent'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=1000,
    height=800, showlegend=False,)
fig.show()
fig = make_subplots(rows=2, cols=4)
model_name = 'Data2Vec'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 5_level_1': '', 'Unnamed: 5_level_2': '', 'Unnamed: 6_level_1': '', 'Unnamed: 6_level_2': ''},inplace=True)
visualize_embeddings(df, label, metrics=[], axis=fig, opt_structure=optimize, red_name='UMAP')
fig.update_layout(
    autosize=False,
    width=4000,
    height=2000, showlegend=False,)
fig.show()
import plotly.express as px
import plotly.offline as py
from plotly.subplots import make_subplots
def visualize_3d_embeddings(df, label_name, metrics=[], axis=[], acoustic_param={}, opt_structure='Local', red_name='PCA', row=1, col=1, hovertext='', label=''):
    traces = px.scatter_3d(x=df[red_name, opt_structure, 'Dim1'], y=df[red_name, opt_structure, 'Dim2'], z=df[red_name, opt_structure, 'Dim3'], color=df[label_name], hover_name=df['AudioNames'])
    traces.layout.update(showlegend=False)
model_name = 'HuBERT_best'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction_3d.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 7_level_1': '', 'Unnamed: 7_level_2': '', 'Unnamed: 8_level_1': '', 'Unnamed: 8_level_2': ''},inplace=True)
fig = px.scatter_3d(x=df['UMAP', 'Global', 'Dim1'], y=df['UMAP', 'Global', 'Dim2'], z=df['UMAP', 'Global', 'Dim3'], color=df[label], hover_name=df['AudioNames'])

fig.update_layout(
    autosize=False,
    width=1000,
    height=1000, showlegend=False,)
fig.show()
model_name = 'TERA'
df = pd.read_csv(f'../{exp_config.dataset_name}/{model_name}/dim_reduction_3d.csv', header=[0,1,2])
df.rename(columns={'Unnamed: 7_level_1': '', 'Unnamed: 7_level_2': '', 'Unnamed: 8_level_1': '', 'Unnamed: 8_level_2': ''},inplace=True)
fig = px.scatter_3d(x=df['UMAP', 'Global', 'Dim1'], y=df['UMAP', 'Global', 'Dim2'], z=df['UMAP', 'Global', 'Dim3'], color=df[label], hover_name=df['AudioNames'])

fig.update_layout(
    autosize=False,
    width=1000,
    height=1000, showlegend=False,)
fig.show()